# coding:utf-8
import base64
import random, re
import sqlite3
import redis, pickle
import json, time
import urllib3, urllib2, hashlib
from datetime import datetime
import threading
import logging.handlers
import sys

reload(sys)
sys.setdefaultencoding('utf-8')
import uuid

import requests

session = requests.session()


# 把连接加密成 MD5 生成唯一的主键
def md5(str):
    import hashlib
    m = hashlib.md5()
    m.update(str)
    return m.hexdigest()


def jinri():
    list_data = []
    for i in range(1, 20):
        # 请求得到url 链接
        url = "http://www.toutiao.com/api/pc/feed/"
        data = {

            "category": "news_game",
            "utm_source": "toutiao",
            "widen": str(i),
            "max_behot_time": "0",
            "max_behot_time_tmp": "0",
            "tadrequire": "true",
            "as": "479BB4B7254C150",
            "cp": "7E0AC8874BB0985",
        }
        headers = {

            "Host": "www.toutiao.com",
            "Connection": "keep-alive",
            "Accept": "text/javascript, text/html, application/xml, text/xml, */*",
            "X-Requested-With": "XMLHttpRequest",
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
            "Content-Type": "application/x-www-form-urlencoded",
            "Referer": "http://www.toutiao.com/ch/news_hot/",
            "Accept-Encoding": "gzip, deflate",
            "Accept-Language": "zh-CN,zh;q=0.8",

        }

        result1 = session.get(url=url, params=data, headers=headers).text
        result2 = json.loads(result1)
        if result2["message1"] == "success":

            for i in result2["data"]:
                source_url = i["source_url"]

                headers = {

                    "Host": "www.toutiao.com",
                    "Connection": "keep-alive",
                    "Cache-Control": "max-age=0",
                    "Upgrade-Insecure-Requests": "1",
                    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
                    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                    "Accept-Encoding": "gzip, deflate",
                    "Accept-Language": "zh-CN,zh;q=0.8",
                }
                url1 = "http://www.toutiao.com" + str(source_url)
                try:

                    return_data = session.get(url=url1, headers=headers).content
                except:
                    pass
                # print return_data
                try:
                    contentData = re.findall(' <article>(.*?)</article>', return_data)[0]
                except:
                    contentData = ""

                cx = sqlite3.connect("C:\\Users\\xuchunlin\\PycharmProjects\\study\\db.sqlite3",
                                     check_same_thread=False)
                cx.text_factory = str

                try:
                    print
                    "正在插入链接   %s   数据" % (url)

                    chinese_ta = i["chinese_tag"]
                    media_avatar_url = i["media_avatar_url"]
                    is_feed_ad = i["is_feed_ad"]
                    tag_url = i["tag_url"]
                    title = i["title"]
                    tag = i["tag"]
                    label = str(i["label"])
                    abstract = i["abstract"]
                    source_url = i["source_url"]
                    print(title)
                    print(chinese_ta)
                    print(media_avatar_url)
                    print(is_feed_ad)
                    print(tag_url)
                    print(tag)
                    print(label)
                    print(abstract)
                    print(source_url)
                    url2 = md5(str(url1))
                    cx.execute(
                        "INSERT INTO toutiao (title,chinese_ta,media_avatar_url,is_feed_ad,tag_url,tag,label,abstract,source_url,url,contentData)VALUES (?,?,?,?,?,?,?,?,?,?,?)",
                        (str(title), str(chinese_ta), str(media_avatar_url), str(is_feed_ad), str(tag_url), str(tag),
                         str(label), str(abstract), str(source_url), str(url2), str(contentData)))
                    cx.commit()
                    # time.sleep(2)
                except Exception as e:
                    print(e)
                    print("cha ru shi bai ")
                cx.close()
        else:
            print("请求失败")
    return list_data

print(jinri())
